library(dplyr)
library(cregg)
library(ggplot2)
library(grDevices)

estimate_marginal_means <- function(experiment_data){
  formula_program <- num_prog ~ local_status + party
  formula_pork <- num_pork ~ local_status + party
  
  
  programmatic_outcome <- cregg::cj(
    formula = formula_program,
    data = experiment_data,
    id = ~respondent,
    estimate = "mm"
  )
  
  
  pork_outcome <- cregg::cj(
    formula = formula_pork,
    data = experiment_data,
    id = ~respondent,
    estimate = "mm"
  )
  
  second_experiment_result <- rbind(programmatic_outcome, pork_outcome)
  
  return(second_experiment_result)
  
}


format_cregg_output_for_plot <- function(second_experiment_result){
  
  
  second_experiment_result$outcome <- as.factor(second_experiment_result$outcome)
  levels(second_experiment_result$outcome) <- c("Number of Particularistic Policies Selected", "Number of Programmatic Policies Selected")
  levels(second_experiment_result$level) <- c("Elite", "Local", "Non-Local", "DPP", "LDP", "JCP", "CDPJ")
  
  return(second_experiment_result)
}

make_policy_type_selection_plot <- function(plot_data, main_paper_version = TRUE){
  
  if (main_paper_version){
    plot_data <- plot_data %>% 
      filter(feature == "local_status")
  }
  
  
  policy_selection_plot <- ggplot(data = plot_data, aes(x = estimate, y = level, xmin = lower, xmax = upper)) +
    geom_point() + 
    geom_errorbar(width = 0) + 
    geom_vline(xintercept = 1.5, linetype = "dashed") + 
    facet_wrap(. ~ outcome, ncol = 1) + 
    scale_y_discrete(name = "Type of Candidate") + 
    scale_x_continuous(name = "Marginal Mean of Number of Policies Selected", limits = c(0.75, 2.)) +
    theme_bw()
  
  return(policy_selection_plot)
  
}

make_data_for_individual_items_comparison <- function(experiment_data){
  # this prepares data for plots that show the MMs by individual items in the
  # appendix 
  
  kigyoyuchi_formula <-  kigyo_yuchi ~ local_status + party
  kanko_sokushin_formula <-  kanko_sokushin ~ local_status + party
  kogyojigyo_formula <-  kogyojigyo ~ local_status + party
  seifusaimu_formula <-  seifusaimu ~ local_status + party
  gaiko_formula <-  gaiko ~ local_status + party
  nenkin_formula <-  nenkin ~ local_status + party
  
  call_cregg_for_given_formula <- function(x) cregg::cj(
    formula = x,
    data = experiment_data,
    id = ~respondent,
    estimate = "mm"
  )
  
  formulas <- list(kigyoyuchi_formula,
                   kanko_sokushin_formula,
                   kogyojigyo_formula,
                   seifusaimu_formula,
                   gaiko_formula,
                   nenkin_formula)
  
  cregg_objects <- lapply(formulas, call_cregg_for_given_formula)
  
  return(cregg_objects)
  
}

make_individual_policy_items_plot <- function(experiment_data){
  
  individual_item_mms <- make_data_for_individual_items_comparison(experiment_data)
  plot_data <- data.table::rbindlist(individual_item_mms)
  
  plot_data <- plot_data %>%
    filter(feature == "local_status")
  
  plot_data <- plot_data %>% 
    mutate(outcome = recode(
      outcome, 
      "gaiko" = "Foreign Affairs", 
      "kanko_sokushin" = "Promotion of Tourism",
      "kigyo_yuchi" = "Attraction of Businesses",
      "kogyojigyo" = "Public Works",
      "nenkin" = "National Pension Plan",
      "seifusaimu" = "National Debt"))
  
  pd <- position_dodge(0.1)
  
  ggplot(data = plot_data, aes(x = estimate, y = level, xmin = lower, xmax = upper, col = outcome)) +
    geom_point(position = pd) + 
    geom_errorbar(width = 0, position = pd) + 
    geom_vline(xintercept =0.5, linetype = "dashed") + 
    scale_y_discrete(name = "Type of Candidate") + 
    scale_x_continuous(name = "Marginal Mean of Policy Selection") +
    theme_bw()
}

path_to_data <- "data"
formatted_survey_file <- "follow_up_survey_formatted.rds"

path_to_figures <- "figures"

path_to_appendix_figures <- "appendix_plots"
mm_plot_file <- "num_policies_selected_long_responses.eps"
duration_plot <- "followup_response_time.eps"

formatted_data <- readRDS(file.path(path_to_data, formatted_survey_file))
formatted_data$duration <- as.numeric(formatted_data$duration)

# Make plot of survey duration 
survey_duration_plot <- formatted_data %>% 
  distinct(ResponseId, .keep_all = TRUE) %>% 
  filter(duration < 2500) %>%
  ggplot(aes(x = as.numeric(duration))) + 
  geom_histogram(alpha = 0.8) + 
  scale_x_continuous(name = "Survey Response Duration in Seconds", breaks = seq(0, 4000, 500)) + 
  scale_y_continuous(name = "Number of Respondents") + 
  theme_bw()

formatted_data <- formatted_data %>% filter(duration > 240)

estimated_mm <- estimate_marginal_means(formatted_data)
plot_data_formatted <- format_cregg_output_for_plot(estimated_mm)

policy_selection_plot <- make_policy_type_selection_plot(plot_data_formatted)

ggsave(
  filename = file.path(path_to_figures, path_to_appendix_figures, mm_plot_file),
  plot = policy_selection_plot,
  height = 8, 
  width = 6,
  units = "in",
  device = cairo_ps,
  fallback_resolution = 200)

ggsave(
  filename = file.path(path_to_figures, path_to_appendix_figures, duration_plot),
  plot = survey_duration_plot,
  height = 8, 
  width = 6,
  units = "in",
  device = cairo_ps,
  fallback_resolution = 200)
